Require packages
library(utils)
library(dplyr)
library(ggplot2)
library(maps)
library(stringr)
library(readr)
library(tidyverse)
library(readxl)
library(plotly)
library(MASS)
library(kableExtra)
library(broom)
library(stargazer)
library(ggfortify)
Cleaning Data
#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")
#library("dplyr") require "dplyr" package for the use of %>%
covid.data = data %>%
rename(Date="dateRep") %>%
rename(Region="countriesAndTerritories") %>%
mutate(Date=as.Date(Date,format="%d/%m/%y"))%>%
mutate(Region = str_replace_all(Region, "_", " "))%>%
group_by(Region) %>%
mutate(cases_Diff = lag(cases)-cases) %>%
mutate(deaths_Diff = lag(deaths)-deaths) %>%
relocate(cases_Diff, .after = cases) %>%
relocate(deaths_Diff, .after = deaths) %>%
drop_na(countryterritoryCode)
Data Summary
#population dataframe
pop.data = unique(data.frame(covid.data$Region,covid.data$popData2019,covid.data$continentExp,covid.data$countryterritoryCode))
names(pop.data)=c("Region","Population2019","Continents","Codes")
pop.data = na.omit(pop.data)
pop.data = unique(pop.data)
#cases dataframe
case.data = data.frame(covid.data$Region,covid.data$cases,covid.data$countryterritoryCode)
names(case.data) = c("Region","cases","Codes")
case.data=aggregate(case.data$cases, by=list(Category=case.data$Region), FUN=sum)
case.data = as.data.frame(cbind(case.data,unique(covid.data$countryterritoryCode)))
names(case.data) = c("Region","cases","Codes")
case.data = na.omit(case.data)
#death dataframe
death.data = data.frame(covid.data$Region,covid.data$deaths,covid.data$countryterritoryCode)
names(death.data) = c("Region","deaths","Codes")
death.data=aggregate(death.data$deaths, by=list(Category=death.data$Region), FUN=sum)
death.data = as.data.frame(cbind(death.data,unique(covid.data$countryterritoryCode)))
names(death.data) = c("Region","deaths","Codes")
death.data = na.omit(death.data)
#Death/Case dataframe
death_case.data = data.frame(case.data$Region,death.data$deaths/case.data$cases,case.data$Codes)
names(death_case.data) = c("Region","Deaths/Case Ratio","Codes")
#death/pop *100
death_pop.data = data.frame(pop.data$Region,death.data$deaths/pop.data$Population2019*100,pop.data$Codes)
names(death_pop.data) = c("Region","Values","Codes")
#Land Area
Land_Area <- read_excel("Land Area.xls")
names(Land_Area) = Land_Area[3,]
Land_Area = Land_Area[-(1:3),]
Land_Area = Land_Area[,c(1,2,62,63)]
#Complete 2018
for (i in 1:nrow(Land_Area)) {
if(is.na(Land_Area$`2018`[i])){
Land_Area$`2018`[i] = Land_Area$`2017`[i]
}
}
Land_Area = na.omit(Land_Area)[,-3]
names(Land_Area)[3] = "Area"
combine = full_join(pop.data, Land_Area, by=c("Codes"="Country Code"))
combine = na.omit(combine)[,-5]
pop.square = na.omit(combine)
pop.square$Area = as.numeric(pop.square$Area)
Ploting data
#Population Ditribution
pop.fig <- plot_ly(pop.data, type='choropleth', locations=pop.data$Codes, z=log2(pop.data$Population2019), text=pop.data$Region, colorscale="Blues",reversescale =T)%>%
layout(title = 'The Logarithm of World Population in 2019')%>%
colorbar(title = "Population Rates",limits = c(15,31))
pop.fig
#Population Density Ditribution
pop.square.fig <- plot_ly(pop.square, type='choropleth', locations=pop.square$Codes, z=log(pop.square$Population2019/pop.square$Area), text=pop.square$Region, colorscale="Blues",reversescale =T)%>%
layout(title = 'The Logarithm of World Population Density in 2019')%>%
colorbar(title = "Density Rates",limits = c(-2,7))
pop.square.fig
#Case Ditribution
case.fig <- plot_ly(case.data, type='choropleth', locations=case.data$Codes, z=log2(case.data$cases), text=case.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Logarithm of World Covid-19 Cases Number')%>%
colorbar(title = "Cases Number",limits = c(4,24))
case.fig
#Death Ditribution
death.fig <- plot_ly(death.data, type='choropleth', locations=death.data$Codes, z=log2(death.data$deaths+1), text=case.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Logarithm of World Covid-19 Deaths Number')%>%
colorbar(title = "Deaths Number")
death.fig
#Death/Case Ditribution
death_case.fig <- plot_ly(death_case.data, type='choropleth', locations=death_case.data$Codes, z=death_case.data$`Deaths/Case Ratio`, text=death_case.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Ratio of World Covid-19 Deaths to Cases Number')%>%
colorbar(title = "Ratio Number",limits = c(0,0.1))
death_case.fig
#Death/pop*100 Ditribution
death_pop.fig <- plot_ly(death_pop.data, type='choropleth', locations=death_pop.data$Codes, z=death_pop.data$`Values`, text=death_pop.data$Region, colorscale="Reds",reversescale =F)%>%
layout(title = 'The Ratio of World Covid-19 Death/Population*100')%>%
colorbar(title = "Ratio Number")
death_pop.fig
Modeling Preparation
#Creat Density Variable
pop.density = pop.square %>%
mutate(Density = Population2019/Area)
pop.density = pop.density[,c(4,6)]
#Creat data for time series
covid.time = covid.data[,c(1,6,8:9,11:12)]%>%
rename(Codes="countryterritoryCode")%>%
inner_join(pop.density)%>%
mutate(Region = as.factor(Region))%>%
mutate(group_id = as.integer(Region))
groups = length(unique(covid.time$Region))
#Lag cases and deaths Difference
for (k in 1:14) {
CASE=NULL
for (j in 1:groups) {
CASE=c(CASE,lag(covid.time$cases_Diff[covid.time$group_id==j],k))
}
covid.time = cbind(covid.time,CASE)
}
names(covid.time)[9:22]=paste("CASE_DIFF",1:14,sep="-")
for (k in 1:14) {
DEATH=NULL
for (j in 1:groups) {
DEATH=c(DEATH,lag(covid.time$deaths_Diff[covid.time$group_id==j],k))
}
covid.time = cbind(covid.time,DEATH)
}
names(covid.time)[23:36]=paste("DEATH_DIFF",1:14,sep="-")
Modeling
#Cases on Cases
data1 = covid.time[,c(2,9:22)]
fit1.m1 <- lm(cases_Diff~., data=na.omit(data1))
fit2.m1 <- lm(cases_Diff~1, data=na.omit(data1))
step.m1 = stepAIC(fit2.m1,direction="both",scope=list(upper=fit1.m1,lower=fit2.m1),trace = FALSE)
summary(step.m1)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `CASE_DIFF-11`, data = na.omit(data1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53130 -25 -16 -4 42720
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.681509 3.821143 4.104 4.07e-05 ***
## `CASE_DIFF-7` 0.489465 0.004681 104.574 < 2e-16 ***
## `CASE_DIFF-14` 0.114185 0.003600 31.714 < 2e-16 ***
## `CASE_DIFF-1` -0.552226 0.004227 -130.642 < 2e-16 ***
## `CASE_DIFF-8` 0.356644 0.004777 74.659 < 2e-16 ***
## `CASE_DIFF-2` -0.419901 0.004742 -88.542 < 2e-16 ***
## `CASE_DIFF-9` 0.212261 0.004955 42.836 < 2e-16 ***
## `CASE_DIFF-6` 0.070500 0.004939 14.275 < 2e-16 ***
## `CASE_DIFF-12` -0.118439 0.004673 -25.344 < 2e-16 ***
## `CASE_DIFF-3` -0.253793 0.004952 -51.254 < 2e-16 ***
## `CASE_DIFF-13` -0.117971 0.004332 -27.232 < 2e-16 ***
## `CASE_DIFF-4` -0.145661 0.005046 -28.869 < 2e-16 ***
## `CASE_DIFF-10` 0.077947 0.004875 15.990 < 2e-16 ***
## `CASE_DIFF-5` -0.074434 0.005050 -14.739 < 2e-16 ***
## `CASE_DIFF-11` 0.014213 0.004820 2.949 0.00319 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 878.7 on 53164 degrees of freedom
## Multiple R-squared: 0.5888, Adjusted R-squared: 0.5887
## F-statistic: 5437 on 14 and 53164 DF, p-value: < 2.2e-16
#Cases & Deaths on Cases
data2 = covid.time[,c(2,9:36)]
fit1.m2 <- lm(cases_Diff~., data=na.omit(data2))
fit2.m2 <- lm(cases_Diff~1, data=na.omit(data2))
step.m2 = stepAIC(fit2.m2,direction="both",scope=list(upper=fit1.m2,lower=fit2.m2),trace = FALSE)
summary(step.m2)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`, data = na.omit(data2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53595 -24 -15 -3 41110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.572354 3.801912 3.833 0.000127 ***
## `CASE_DIFF-7` 0.464288 0.004828 96.157 < 2e-16 ***
## `CASE_DIFF-14` 0.109556 0.003669 29.858 < 2e-16 ***
## `CASE_DIFF-1` -0.559992 0.004262 -131.384 < 2e-16 ***
## `CASE_DIFF-8` 0.331329 0.004886 67.818 < 2e-16 ***
## `CASE_DIFF-2` -0.429274 0.004797 -89.491 < 2e-16 ***
## `CASE_DIFF-9` 0.198530 0.005027 39.491 < 2e-16 ***
## `CASE_DIFF-6` 0.054925 0.005046 10.884 < 2e-16 ***
## `CASE_DIFF-12` -0.119549 0.004748 -25.180 < 2e-16 ***
## `CASE_DIFF-3` -0.257402 0.004970 -51.792 < 2e-16 ***
## `CASE_DIFF-13` -0.121642 0.004406 -27.608 < 2e-16 ***
## `CASE_DIFF-4` -0.145509 0.005063 -28.742 < 2e-16 ***
## `CASE_DIFF-10` 0.071964 0.004943 14.558 < 2e-16 ***
## `CASE_DIFF-5` -0.080894 0.005125 -15.785 < 2e-16 ***
## `DEATH_DIFF-8` 2.188295 0.111527 19.621 < 2e-16 ***
## `DEATH_DIFF-7` 1.949829 0.102564 19.011 < 2e-16 ***
## `DEATH_DIFF-6` 1.273209 0.090964 13.997 < 2e-16 ***
## `DEATH_DIFF-9` 1.532828 0.115625 13.257 < 2e-16 ***
## `DEATH_DIFF-5` 0.639008 0.071197 8.975 < 2e-16 ***
## `DEATH_DIFF-2` 0.333297 0.063829 5.222 1.78e-07 ***
## `CASE_DIFF-11` 0.010553 0.004889 2.158 0.030910 *
## `DEATH_DIFF-10` 1.280985 0.114642 11.174 < 2e-16 ***
## `DEATH_DIFF-11` 1.225222 0.110269 11.111 < 2e-16 ***
## `DEATH_DIFF-12` 1.007917 0.102001 9.881 < 2e-16 ***
## `DEATH_DIFF-13` 0.862914 0.090589 9.526 < 2e-16 ***
## `DEATH_DIFF-14` 0.572329 0.070579 8.109 5.21e-16 ***
## `DEATH_DIFF-1` 0.176808 0.064201 2.754 0.005889 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 874.2 on 53152 degrees of freedom
## Multiple R-squared: 0.5931, Adjusted R-squared: 0.5929
## F-statistic: 2980 on 26 and 53152 DF, p-value: < 2.2e-16
#Compare models
anova(step.m1,step.m2)
## Analysis of Variance Table
##
## Model 1: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `CASE_DIFF-11`
## Model 2: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 53164 4.1052e+10
## 2 53152 4.0622e+10 12 4.3e+08 46.886 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Cases, Deaths & Population on Cases
data3 = covid.time[,c(2,6,9:36)]
fit1.m3 <- lm(cases_Diff~., data=na.omit(data3))
fit2.m3 <- lm(cases_Diff~1, data=na.omit(data3))
step.m3 = stepAIC(fit2.m3,direction="both",scope=list(upper=fit1.m3,lower=fit2.m3),trace = FALSE)
summary(step.m3)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## popData2019 + `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` +
## `DEATH_DIFF-12` + `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`,
## data = na.omit(data3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53607 -24 -11 0 41083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.023e+01 3.934e+00 2.601 0.00930 **
## `CASE_DIFF-7` 4.632e-01 4.835e-03 95.807 < 2e-16 ***
## `CASE_DIFF-14` 1.093e-01 3.669e-03 29.774 < 2e-16 ***
## `CASE_DIFF-1` -5.604e-01 4.263e-03 -131.469 < 2e-16 ***
## `CASE_DIFF-8` 3.303e-01 4.891e-03 67.535 < 2e-16 ***
## `CASE_DIFF-2` -4.299e-01 4.798e-03 -89.593 < 2e-16 ***
## `CASE_DIFF-9` 1.976e-01 5.031e-03 39.286 < 2e-16 ***
## `CASE_DIFF-6` 5.388e-02 5.052e-03 10.667 < 2e-16 ***
## `CASE_DIFF-12` -1.201e-01 4.749e-03 -25.291 < 2e-16 ***
## `CASE_DIFF-3` -2.582e-01 4.973e-03 -51.922 < 2e-16 ***
## `CASE_DIFF-13` -1.221e-01 4.406e-03 -27.706 < 2e-16 ***
## `CASE_DIFF-4` -1.464e-01 5.066e-03 -28.899 < 2e-16 ***
## `CASE_DIFF-10` 7.124e-02 4.945e-03 14.405 < 2e-16 ***
## `CASE_DIFF-5` -8.185e-02 5.129e-03 -15.959 < 2e-16 ***
## `DEATH_DIFF-8` 2.187e+00 1.115e-01 19.613 < 2e-16 ***
## `DEATH_DIFF-7` 1.948e+00 1.025e-01 18.999 < 2e-16 ***
## `DEATH_DIFF-6` 1.272e+00 9.095e-02 13.985 < 2e-16 ***
## `DEATH_DIFF-9` 1.532e+00 1.156e-01 13.252 < 2e-16 ***
## `DEATH_DIFF-5` 6.381e-01 7.119e-02 8.963 < 2e-16 ***
## `DEATH_DIFF-2` 3.329e-01 6.382e-02 5.216 1.83e-07 ***
## popData2019 1.032e-07 2.410e-08 4.282 1.86e-05 ***
## `CASE_DIFF-11` 9.897e-03 4.891e-03 2.024 0.04302 *
## `DEATH_DIFF-10` 1.281e+00 1.146e-01 11.172 < 2e-16 ***
## `DEATH_DIFF-11` 1.225e+00 1.103e-01 11.112 < 2e-16 ***
## `DEATH_DIFF-12` 1.008e+00 1.020e-01 9.884 < 2e-16 ***
## `DEATH_DIFF-13` 8.632e-01 9.057e-02 9.531 < 2e-16 ***
## `DEATH_DIFF-14` 5.726e-01 7.057e-02 8.114 4.99e-16 ***
## `DEATH_DIFF-1` 1.763e-01 6.419e-02 2.746 0.00604 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 874.1 on 53151 degrees of freedom
## Multiple R-squared: 0.5932, Adjusted R-squared: 0.593
## F-statistic: 2871 on 27 and 53151 DF, p-value: < 2.2e-16
#Compare models
anova(step.m2,step.m3)
## Analysis of Variance Table
##
## Model 1: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`
## Model 2: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## popData2019 + `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` +
## `DEATH_DIFF-12` + `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 53152 4.0622e+10
## 2 53151 4.0608e+10 1 14005776 18.332 1.859e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Cases, Deaths & Population on Cases
data4 = covid.time[,c(2,7,9:36)]
fit1.m4 <- lm(cases_Diff~., data=na.omit(data4))
fit2.m4 <- lm(cases_Diff~1, data=na.omit(data4))
step.m4 = stepAIC(fit2.m4,direction="both",scope=list(upper=fit1.m4,lower=fit2.m4),trace = FALSE)
summary(step.m4)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`, data = na.omit(data4))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53595 -24 -15 -3 41110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.572354 3.801912 3.833 0.000127 ***
## `CASE_DIFF-7` 0.464288 0.004828 96.157 < 2e-16 ***
## `CASE_DIFF-14` 0.109556 0.003669 29.858 < 2e-16 ***
## `CASE_DIFF-1` -0.559992 0.004262 -131.384 < 2e-16 ***
## `CASE_DIFF-8` 0.331329 0.004886 67.818 < 2e-16 ***
## `CASE_DIFF-2` -0.429274 0.004797 -89.491 < 2e-16 ***
## `CASE_DIFF-9` 0.198530 0.005027 39.491 < 2e-16 ***
## `CASE_DIFF-6` 0.054925 0.005046 10.884 < 2e-16 ***
## `CASE_DIFF-12` -0.119549 0.004748 -25.180 < 2e-16 ***
## `CASE_DIFF-3` -0.257402 0.004970 -51.792 < 2e-16 ***
## `CASE_DIFF-13` -0.121642 0.004406 -27.608 < 2e-16 ***
## `CASE_DIFF-4` -0.145509 0.005063 -28.742 < 2e-16 ***
## `CASE_DIFF-10` 0.071964 0.004943 14.558 < 2e-16 ***
## `CASE_DIFF-5` -0.080894 0.005125 -15.785 < 2e-16 ***
## `DEATH_DIFF-8` 2.188295 0.111527 19.621 < 2e-16 ***
## `DEATH_DIFF-7` 1.949829 0.102564 19.011 < 2e-16 ***
## `DEATH_DIFF-6` 1.273209 0.090964 13.997 < 2e-16 ***
## `DEATH_DIFF-9` 1.532828 0.115625 13.257 < 2e-16 ***
## `DEATH_DIFF-5` 0.639008 0.071197 8.975 < 2e-16 ***
## `DEATH_DIFF-2` 0.333297 0.063829 5.222 1.78e-07 ***
## `CASE_DIFF-11` 0.010553 0.004889 2.158 0.030910 *
## `DEATH_DIFF-10` 1.280985 0.114642 11.174 < 2e-16 ***
## `DEATH_DIFF-11` 1.225222 0.110269 11.111 < 2e-16 ***
## `DEATH_DIFF-12` 1.007917 0.102001 9.881 < 2e-16 ***
## `DEATH_DIFF-13` 0.862914 0.090589 9.526 < 2e-16 ***
## `DEATH_DIFF-14` 0.572329 0.070579 8.109 5.21e-16 ***
## `DEATH_DIFF-1` 0.176808 0.064201 2.754 0.005889 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 874.2 on 53152 degrees of freedom
## Multiple R-squared: 0.5931, Adjusted R-squared: 0.5929
## F-statistic: 2980 on 26 and 53152 DF, p-value: < 2.2e-16
#Compare models
anova(step.m3,step.m4)
## Analysis of Variance Table
##
## Model 1: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## popData2019 + `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` +
## `DEATH_DIFF-12` + `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`
## Model 2: cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` + `DEATH_DIFF-12` +
## `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 53151 4.0608e+10
## 2 53152 4.0622e+10 -1 -14005776 18.332 1.859e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Final Model & Diagnostic Test
summary(step.m3)
##
## Call:
## lm(formula = cases_Diff ~ `CASE_DIFF-7` + `CASE_DIFF-14` + `CASE_DIFF-1` +
## `CASE_DIFF-8` + `CASE_DIFF-2` + `CASE_DIFF-9` + `CASE_DIFF-6` +
## `CASE_DIFF-12` + `CASE_DIFF-3` + `CASE_DIFF-13` + `CASE_DIFF-4` +
## `CASE_DIFF-10` + `CASE_DIFF-5` + `DEATH_DIFF-8` + `DEATH_DIFF-7` +
## `DEATH_DIFF-6` + `DEATH_DIFF-9` + `DEATH_DIFF-5` + `DEATH_DIFF-2` +
## popData2019 + `CASE_DIFF-11` + `DEATH_DIFF-10` + `DEATH_DIFF-11` +
## `DEATH_DIFF-12` + `DEATH_DIFF-13` + `DEATH_DIFF-14` + `DEATH_DIFF-1`,
## data = na.omit(data3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53607 -24 -11 0 41083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.023e+01 3.934e+00 2.601 0.00930 **
## `CASE_DIFF-7` 4.632e-01 4.835e-03 95.807 < 2e-16 ***
## `CASE_DIFF-14` 1.093e-01 3.669e-03 29.774 < 2e-16 ***
## `CASE_DIFF-1` -5.604e-01 4.263e-03 -131.469 < 2e-16 ***
## `CASE_DIFF-8` 3.303e-01 4.891e-03 67.535 < 2e-16 ***
## `CASE_DIFF-2` -4.299e-01 4.798e-03 -89.593 < 2e-16 ***
## `CASE_DIFF-9` 1.976e-01 5.031e-03 39.286 < 2e-16 ***
## `CASE_DIFF-6` 5.388e-02 5.052e-03 10.667 < 2e-16 ***
## `CASE_DIFF-12` -1.201e-01 4.749e-03 -25.291 < 2e-16 ***
## `CASE_DIFF-3` -2.582e-01 4.973e-03 -51.922 < 2e-16 ***
## `CASE_DIFF-13` -1.221e-01 4.406e-03 -27.706 < 2e-16 ***
## `CASE_DIFF-4` -1.464e-01 5.066e-03 -28.899 < 2e-16 ***
## `CASE_DIFF-10` 7.124e-02 4.945e-03 14.405 < 2e-16 ***
## `CASE_DIFF-5` -8.185e-02 5.129e-03 -15.959 < 2e-16 ***
## `DEATH_DIFF-8` 2.187e+00 1.115e-01 19.613 < 2e-16 ***
## `DEATH_DIFF-7` 1.948e+00 1.025e-01 18.999 < 2e-16 ***
## `DEATH_DIFF-6` 1.272e+00 9.095e-02 13.985 < 2e-16 ***
## `DEATH_DIFF-9` 1.532e+00 1.156e-01 13.252 < 2e-16 ***
## `DEATH_DIFF-5` 6.381e-01 7.119e-02 8.963 < 2e-16 ***
## `DEATH_DIFF-2` 3.329e-01 6.382e-02 5.216 1.83e-07 ***
## popData2019 1.032e-07 2.410e-08 4.282 1.86e-05 ***
## `CASE_DIFF-11` 9.897e-03 4.891e-03 2.024 0.04302 *
## `DEATH_DIFF-10` 1.281e+00 1.146e-01 11.172 < 2e-16 ***
## `DEATH_DIFF-11` 1.225e+00 1.103e-01 11.112 < 2e-16 ***
## `DEATH_DIFF-12` 1.008e+00 1.020e-01 9.884 < 2e-16 ***
## `DEATH_DIFF-13` 8.632e-01 9.057e-02 9.531 < 2e-16 ***
## `DEATH_DIFF-14` 5.726e-01 7.057e-02 8.114 4.99e-16 ***
## `DEATH_DIFF-1` 1.763e-01 6.419e-02 2.746 0.00604 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 874.1 on 53151 degrees of freedom
## Multiple R-squared: 0.5932, Adjusted R-squared: 0.593
## F-statistic: 2871 on 27 and 53151 DF, p-value: < 2.2e-16
#Diagnostic Test
plot(step.m3,sub.caption = "")
Model Presenting
| Model Name | Res.DF | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|---|
| 1 | 52547 | 39.17 | ||||
| 2 | 52534 | 38.78 | 13 | 388052910 | 40.438 | <2.2e-16 *** |
| Model Name | Res.DF | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|---|
| 2 | 52534 | 38.78 | ||||
| 3 | 52533 | 38.77 | 1 | 11207507 | 15.187 | 9.75e-05 *** |
| Model Name | Res.DF | RSS | Df | Sum of | ||
|---|---|---|---|---|---|---|
| 3 | 52533 | 38.77 | ||||
| 4 | 52534 | 38.78 | -1 | -11207507 | 15.187 | 9.75e-05 *** |
stargazer(step.m1, step.m2, step.m3, type = "text", title = "Results of Model 1 & 2 & 3", align = T)
##
## Results of Model 1 & 2 & 3
## =============================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------
## cases_Diff
## (1) (2) (3)
## -------------------------------------------------------------------------------------------------------------
## `CASE_DIFF-7` 0.489*** 0.464*** 0.463***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-14` 0.114*** 0.110*** 0.109***
## (0.004) (0.004) (0.004)
##
## `CASE_DIFF-1` -0.552*** -0.560*** -0.560***
## (0.004) (0.004) (0.004)
##
## `CASE_DIFF-8` 0.357*** 0.331*** 0.330***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-2` -0.420*** -0.429*** -0.430***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-9` 0.212*** 0.199*** 0.198***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-6` 0.070*** 0.055*** 0.054***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-12` -0.118*** -0.120*** -0.120***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-3` -0.254*** -0.257*** -0.258***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-13` -0.118*** -0.122*** -0.122***
## (0.004) (0.004) (0.004)
##
## `CASE_DIFF-4` -0.146*** -0.146*** -0.146***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-10` 0.078*** 0.072*** 0.071***
## (0.005) (0.005) (0.005)
##
## `CASE_DIFF-5` -0.074*** -0.081*** -0.082***
## (0.005) (0.005) (0.005)
##
## `DEATH_DIFF-8` 2.188*** 2.187***
## (0.112) (0.112)
##
## `DEATH_DIFF-7` 1.950*** 1.948***
## (0.103) (0.103)
##
## `DEATH_DIFF-6` 1.273*** 1.272***
## (0.091) (0.091)
##
## `DEATH_DIFF-9` 1.533*** 1.532***
## (0.116) (0.116)
##
## `DEATH_DIFF-5` 0.639*** 0.638***
## (0.071) (0.071)
##
## `DEATH_DIFF-2` 0.333*** 0.333***
## (0.064) (0.064)
##
## popData2019 0.00000***
## (0.00000)
##
## `CASE_DIFF-11` 0.014*** 0.011** 0.010**
## (0.005) (0.005) (0.005)
##
## `DEATH_DIFF-10` 1.281*** 1.281***
## (0.115) (0.115)
##
## `DEATH_DIFF-11` 1.225*** 1.225***
## (0.110) (0.110)
##
## `DEATH_DIFF-12` 1.008*** 1.008***
## (0.102) (0.102)
##
## `DEATH_DIFF-13` 0.863*** 0.863***
## (0.091) (0.091)
##
## `DEATH_DIFF-14` 0.572*** 0.573***
## (0.071) (0.071)
##
## `DEATH_DIFF-1` 0.177*** 0.176***
## (0.064) (0.064)
##
## Constant 15.682*** 14.572*** 10.232***
## (3.821) (3.802) (3.934)
##
## -------------------------------------------------------------------------------------------------------------
## Observations 53,179 53,179 53,179
## R2 0.589 0.593 0.593
## Adjusted R2 0.589 0.593 0.593
## Residual Std. Error 878.732 (df = 53164) 874.216 (df = 53152) 874.074 (df = 53151)
## F Statistic 5,436.861*** (df = 14; 53164) 2,979.502*** (df = 26; 53152) 2,870.764*** (df = 27; 53151)
## =============================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(step.m3, step.m4, type = "text", title = "Results of Model 3 & 4", align = T)
##
## Results of Model 3 & 4
## ===============================================================================
## Dependent variable:
## -----------------------------------------------------------
## cases_Diff
## (1) (2)
## -------------------------------------------------------------------------------
## `CASE_DIFF-7` 0.463*** 0.464***
## (0.005) (0.005)
##
## `CASE_DIFF-14` 0.109*** 0.110***
## (0.004) (0.004)
##
## `CASE_DIFF-1` -0.560*** -0.560***
## (0.004) (0.004)
##
## `CASE_DIFF-8` 0.330*** 0.331***
## (0.005) (0.005)
##
## `CASE_DIFF-2` -0.430*** -0.429***
## (0.005) (0.005)
##
## `CASE_DIFF-9` 0.198*** 0.199***
## (0.005) (0.005)
##
## `CASE_DIFF-6` 0.054*** 0.055***
## (0.005) (0.005)
##
## `CASE_DIFF-12` -0.120*** -0.120***
## (0.005) (0.005)
##
## `CASE_DIFF-3` -0.258*** -0.257***
## (0.005) (0.005)
##
## `CASE_DIFF-13` -0.122*** -0.122***
## (0.004) (0.004)
##
## `CASE_DIFF-4` -0.146*** -0.146***
## (0.005) (0.005)
##
## `CASE_DIFF-10` 0.071*** 0.072***
## (0.005) (0.005)
##
## `CASE_DIFF-5` -0.082*** -0.081***
## (0.005) (0.005)
##
## `DEATH_DIFF-8` 2.187*** 2.188***
## (0.112) (0.112)
##
## `DEATH_DIFF-7` 1.948*** 1.950***
## (0.103) (0.103)
##
## `DEATH_DIFF-6` 1.272*** 1.273***
## (0.091) (0.091)
##
## `DEATH_DIFF-9` 1.532*** 1.533***
## (0.116) (0.116)
##
## `DEATH_DIFF-5` 0.638*** 0.639***
## (0.071) (0.071)
##
## `DEATH_DIFF-2` 0.333*** 0.333***
## (0.064) (0.064)
##
## popData2019 0.00000***
## (0.00000)
##
## `CASE_DIFF-11` 0.010** 0.011**
## (0.005) (0.005)
##
## `DEATH_DIFF-10` 1.281*** 1.281***
## (0.115) (0.115)
##
## `DEATH_DIFF-11` 1.225*** 1.225***
## (0.110) (0.110)
##
## `DEATH_DIFF-12` 1.008*** 1.008***
## (0.102) (0.102)
##
## `DEATH_DIFF-13` 0.863*** 0.863***
## (0.091) (0.091)
##
## `DEATH_DIFF-14` 0.573*** 0.572***
## (0.071) (0.071)
##
## `DEATH_DIFF-1` 0.176*** 0.177***
## (0.064) (0.064)
##
## Constant 10.232*** 14.572***
## (3.934) (3.802)
##
## -------------------------------------------------------------------------------
## Observations 53,179 53,179
## R2 0.593 0.593
## Adjusted R2 0.593 0.593
## Residual Std. Error 874.074 (df = 53151) 874.216 (df = 53152)
## F Statistic 2,870.764*** (df = 27; 53151) 2,979.502*** (df = 26; 53152)
## ===============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01